
****************************************************************************************************************************
********** THIS .DO FILE FINALIZES THE CREATION OF THE DATASET FOR THE BALANCE TEST ON CANDIDATES CHARACTERISTICS **********
****************************************************************************************************************************

clear *
set maxvar 10000

*** Prepare candidates characteristics and campaign finance data (long format: an observation is a candidate) for merging ***
use "${root}/data/processed/candidates_chars.dta"

destring mun_code, replace

duplicates report mun_code year sequencial_candidato

* make variable names more manageable
rename tot_cand_expenditures 		cand_exp
rename tot_com_expenditures 		com_exp
rename tot_cand_revenues 			cand_revenues
rename tot_com_revenues 			com_revenues

rename sequencial_candidato sq_candidato

tempfile cand_chars
save `cand_chars'


*** Prepare election data (long format) for merging ***
use "${root}/data/processed/margin_of_victories_long.dta", clear

* keep only most-voted left and most-voted non-left candidates
keep if rank == 1 | best_opposition == rank

* harmonize variable names and format
rename ano_eleicao year
destring mun_code, replace
drop best_opposition
destring mayor_left, replace

tempfile margin_long
save `margin_long'


*** merge and save the data on left mayors ***
keep if mayor_left==1
merge 1:1 year mun_code sq_candidato using `cand_chars'

* most of the non-matched from master are from year 2000 (not in the candidate dataset) 
* Only 15 observations from 2004
tab year if _merge == 1
drop if _merge==2 

drop _merge mayor_left
foreach car of varlist age dob education marital_status nome_candidato rank sex sigla_partido self_reported_wealth cand* com*  {
	rename `car' left_`car'
	}

tempfile left_candidates
save `left_candidates'


*** merge and save the data on non-left mayors ***
use `margin_long', clear
keep if mayor_left==0
merge 1:1 year mun_code sq_candidato using `cand_chars'
drop if _merge==2

drop _merge mayor_left
foreach car of varlist age dob education marital_status nome_candidato rank sex sigla_partido self_reported_wealth cand* com* {
	rename `car' nonleft_`car'
	}

tempfile conservative_candidates
save `conservative_candidates'


*** merge data on left and conservative-left mayors in a wide format dataset ***
merge 1:1 mun_code year using `left_candidates'
drop _merge
* get baseline_sample indicator from the main sample
merge 1:1 mun_code year using "${root}/data/processed/final_sample.dta", keepusing(baseline_sample lame_duck oil_sample)
xtset mun_code year


*** prepare variables ***
* dummy for incumbent candidates
* create dummy for incumbent candidates
gen incumbent_mayor=""
replace incumbent_mayor=left_nome_candidato[_n-1] if mun_code==mun_code[_n-1] & year-year[_n-1]==4 & margin_mayor_left[_n-1]>=0
replace incumbent_mayor=nonleft_nome_candidato[_n-1] if mun_code==mun_code[_n-1] & year-year[_n-1]==4 & margin_mayor_left[_n-1]<0
drop if year == 2000
foreach side in left nonleft {
	gen `side'_incumbent=.
	replace `side'_incumbent=1 if `side'_nome_candidato==incumbent_mayor & incumbent_mayor!=""
	replace `side'_incumbent=0 if `side'_nome_candidato!=incumbent_mayor 
	}
* dummy for higher education
foreach side in left nonleft {
	gen `side'_higher_ed = .
	replace `side'_higher_ed=1 if `side'_education=="More than High School"
	replace `side'_higher_ed=0 if `side'_education=="Less than High School" | `side'_education=="Some High School" | `side'_education=="ILITERATE"
	tab `side'_education `side'_higher_ed, missing
	}
* Normalize election spending by the sum of spending of the two relevant candidates
foreach exp in cand_exp com_exp {
	gen sum_`exp'=left_`exp'+nonleft_`exp'
	gen left_`exp'_share = left_`exp'/sum_`exp'
	drop sum_`exp'
	}
* Create year dummies
tab year, gen(yy)

* drop irregular elections
drop if irregular == 1

****************************************************
* Save dataset file to be used in the balance test *
****************************************************
save "${root}/data/processed/final_candidate_characteristics.dta", replace
